######################################
# Media Measurement Matters          #
# Replication Code                   #
# Appendix R: Sensitivity Analysis   #
######################################

# The following file contains code for replicating the results in Appendix R. 
# Appendix R conducts sensitivity analyses for the stated preference results, using 
# the approach recommended by Knox et al. (2019). 

# Caution: this code may take some time to run. We have provided all relevant output
# in the sensitivity sub-folder, if you want to skip straight to the section titled
# "Identify where sensitivity bounds cross zero."

# Set-up ----

# If desired, set up path into which to save plots and tables
plot_path <- NULL
table_path <- NULL

# Load libraries
library(tidyverse)
library(reshape2)
library(MASS)    # for mvrnorm()
library(gtools)  # for rdirichlet()
library(lpSolve)

# Set up colors
red_mit = '#A31F34'
red_light = '#A9606C'
blue_mit = '#315485'
grey_light= '#C2C0BF'
grey_dark = '#8A8B8C'
black = '#353132'

# Helper function
'%.%' <- function(x,y) paste(x,y,sep='')

# Sensitivity analysis for stated preferences ----

# Read in survey data
srvy <- read_rds("data/survey_data_cleaned.rds")

# Source sensitivity analysis function
source("sensitivity/bounds_frechet_function.R")

# > Set-up ----

# Set up raw survey data
d.raw <- srvy %>% as.data.frame()

# Define labels
med_labels = c("Entertainment", "Fox", "MSNBC") # Stated preference groups
J = length(med_labels) # Number of stated preference groups

outcomes = c("charter_index", "actions_index")
outcome_labels = c(charter_index = "Attitudinal Index", actions_index = "Sharing Index")

# Set range for dependent variables
y.min = 0
y.max = 1

# > Construct bounds ----

# Calculate sensitivity bounds, using the bounds.frechet() function from
# Knox et al. (2019).
  # Estimated run-time: 18 minutes
  # Note: If you want to skip this step and use existing output, run the code as is.
        # If you want to run the code yourself, change the content of output.fname and run.

for (outcome in outcomes){
  
  set.seed(02139)
  
  cat("outcome:", outcome, '\n')
  
  output.fname = 'sensitivity/results_' %.% outcome %.% '_use.RData'
  if (file.exists(output.fname)){
    next
  }
  
  # Create dataframe
  d <- d.raw %>% 
    mutate(S = case_when(med_pref == "Fox" ~ 1, med_pref == "MSNBC" ~ 2, 
                         med_pref == "Entertainment" ~ 3),
           A = case_when(article_read == "Fox" ~ 1, article_read == "MSNBC" ~ 2, 
                         article_read == "Entertainment" ~ 3),
           C = case_when(med_choice == "Fox" ~ 1, med_choice == "MSNBC" ~ 2, 
                         med_choice == "Entertainment" ~ 3),
           D = forcedchoice,
           Y = get(outcome)) %>% 
    dplyr::select(S, A, C, D, Y)
  
  # Drop incomplete cases
  drop = which(is.na(d$S) | is.na(d$D) | is.na(d$A) | 
                 is.na(d$Y) | (is.na(d$C) & d$D==0))
  if (length(drop) > 0) d = d[-drop,]
  
  # Check data before running bounds
  if (any(d$Y < y.min | d$Y > y.max,na.rm=T)) stop("outcome values inconsistent with given upper/lower bounds")
  all.equal(as.numeric(d[d$D==0,]$C),
            as.numeric(d[d$D==0,]$A)) # Assignment always equals choice in free condition
  all(is.na(d$C[d$D==1])) # No media choice recorded for forced condition
  !any(is.na(d[d$D==0,])) # No missing data in free condition
  !any(is.na(d[d$D==1,-which(colnames(d)=="C")])) # No missing data in forced condition, except choice
  
  # Calculate bounds
  effects.mat = matrix(FALSE, 3, 3)
  effects.mat[3, 1] = TRUE # for Fox vs Ent
  effects.mat[3, 2] = TRUE # for MSNBC vs Ent
  
  out = bounds.frechet(
    data = d,
    effects.mat = effects.mat,
    posterior = TRUE,
    n_dir_MC = 500,
    n_norm_MC = 10,
    sensitivity = TRUE,
    rhos = seq(0, .25, .01),
    hpd = TRUE,
    alpha = .95
  )
  
  # Save results to file
  save(out, file=output.fname)
  
  cat('=====\n')
  
}

# > Wrangle results for plotting ----

# Remove existing versions of files (if have run previously)
rm(naive.all, naive.ci.all,
   bounds.all, bounds.ci.all,
   sens.all, sens.ci.all)

for (outcome in outcomes){
  
  load('sensitivity/results_' %.% outcome %.% '_use.RData')
  
  # Merge all naive estimates together
  out$naive$effects$rho = 0 # Plot naive estimate at left side of plot
  out$naive$effects$outcome = outcome
  if (exists('naive.all')){
    naive.all = rbind(naive.all, out$naive$effects)
  } else {
    naive.all = out$naive$effects
  }
  
  # Merge naive CIs together
  naive.ci = out$posterior$naive_effects.ci
  naive.ci$rho = 0 # Plot naive estimate at left side of plot
  naive.ci$outcome = outcome
  if (exists('naive.ci.all')){
    naive.ci.all = rbind(naive.ci.all, naive.ci)
  } else {
    naive.ci.all = naive.ci
  }
  
  # Merge all bounds together
  out$effects$rho = .175 # Plot bounds at right side of plot
  out$effects$outcome = outcome
  if (exists('bounds.all')){
    bounds.all = rbind(bounds.all, out$effects)
  } else {
    bounds.all = out$effects
  }
  
  # Merge bounds CIs together
  effects.ci = out$posterior$effects.ci
  effects.ci$rho = .175 # Plot bounds at right side of plot
  effects.ci$outcome = outcome
  if (exists('bounds.ci.all')){
    bounds.ci.all = rbind(bounds.ci.all, effects.ci)
  } else {
    bounds.ci.all = effects.ci
  }
  
  # Merge all sensitivity results together
  out$sensitivity$effects$outcome = outcome
  if (exists('sens.all')){
    sens.all = rbind(sens.all, out$sensitivity$effects)
  } else {
    sens.all = out$sensitivity$effects
  }
  
  # Merge sensitivity CIs together
  sens.ci = out$posterior$sens_effects.ci
  sens.undef.ind = which(is.na(out$sensitivity$effects$min) | is.na(out$sensitivity$effects$max))
  sens.ci$min_cilo[sens.undef.ind] = sens.ci$max_cihi[sens.undef.ind] = NA
  sens.ci$outcome = outcome
  if (exists('sens.ci.all')){
    sens.ci.all = rbind(sens.ci.all, sens.ci)
  } else {
    sens.ci.all = sens.ci
  }
  
  rm(out)
  
}

# Prepare labels for plotting

# Set up the outcome as a factor, append appropriate labels
naive.all$outcome <- factor(naive.all$outcome, levels = outcomes,
                            labels = outcome_labels, ordered = TRUE)
naive.ci.all$outcome <- factor(naive.ci.all$outcome, levels = outcomes,
                               labels = outcome_labels, ordered = TRUE)
bounds.all$outcome <- factor(bounds.all$outcome, levels = outcomes,
                             labels = outcome_labels, ordered = TRUE)
bounds.ci.all$outcome <- factor(bounds.ci.all$outcome, levels = outcomes,
                                labels = outcome_labels, ordered = TRUE)
sens.all$outcome <- factor(sens.all$outcome, levels = outcomes,
                           labels = outcome_labels, ordered = TRUE)
sens.ci.all$outcome <- factor(sens.ci.all$outcome, levels = outcomes,
                              labels = outcome_labels, ordered = TRUE)

# Set up the stated preference category as a factor, append labels
med_labels_use <- "Prefer " %.% med_labels[c(3, 1, 2)]
med_levels <- c(2, 3, 1)

naive.all$C <- factor(naive.all$C, levels = med_levels, 
                      labels = med_labels_use, ordered = TRUE)
naive.ci.all$C <- factor(naive.ci.all$C, levels = med_levels, 
                         labels = med_labels_use, ordered = TRUE)
bounds.all$C <- factor(bounds.all$C, levels = med_levels, 
                       labels = med_labels_use, ordered = TRUE)
bounds.ci.all$C <- factor(bounds.ci.all$C, levels = med_levels, 
                          labels = med_labels_use, ordered = TRUE)
sens.all$C <- factor(sens.all$C, levels = med_levels, 
                     labels = med_labels_use, ordered = TRUE)
sens.ci.all$C <- factor(sens.ci.all$C, levels = med_levels, 
                        labels = med_labels_use, ordered = TRUE)

# > Plot results ----

# Set theme for plotting
theme_set(theme_bw() + 
            theme(legend.position = "bottom",
                  plot.title = element_text(hjust = 0.5, face = "bold",size = 16),
                  plot.subtitle = element_text(hjust = 0.5, face = "italic", size = 12),
                  axis.title.x = element_text(margin = unit(c(3, 0, 0, 0), "mm"),
                                              face = "bold", size = 12, angle = 0, hjust = 0.5),
                  axis.title.y = element_text(margin = unit(c(0, 3, 0, 0), "mm"), size = 12),
                  legend.title = element_text(face = "bold", hjust = 0.5, size = 12),
                  legend.text = element_text(hjust = 0.5, size = 10),
                  axis.text.y = element_text(size = 10, color = "black"),
                  legend.box = "vertical",
                  legend.background = element_blank(),
                  legend.box.background = element_rect(colour = "black")))

# Limit plots to area where sensitivity analyses haven't flattened:
sens.ci.all <- sens.ci.all %>%
  dplyr::group_by(C, treat, ctrl, outcome) %>%
  dplyr::mutate(min_min_cilo = min(min_cilo,na.rm=T),
         max_max_cihi = max(max_cihi,na.rm=T),
         rhomax_cilo = rho[which.min(min_cilo)],
         rhomax_cihi = rho[which.max(max_cihi)]
  ) %>%
  ungroup() %>%
  dplyr::mutate(gtmaxrho = ifelse(rho>rhomax_cilo & rho>rhomax_cihi,1,0))
sens.all <- left_join(sens.all,
                      dplyr::select(sens.ci.all, C, treat, ctrl, 
                                    outcome,rho, gtmaxrho),
                      by=c("C","treat","ctrl","rho", "outcome"))

sens.all$min[which(sens.all$gtmaxrho==1)] <- NA
sens.all$max[sens.all$gtmaxrho==1] <- NA
sens.ci.all$min_cilo[sens.ci.all$gtmaxrho==1] <- NA
sens.ci.all$max_cihi[sens.ci.all$gtmaxrho==1] <- NA

# Figure R1: sensitivity analyses for comparison of Fox versus entertainment
(r1 <- ggplot(filter(sens.ci.all,treat==1), aes(x=rho)) +
    geom_ribbon(aes(ymin=min_cilo,ymax=max_cihi),
                data=subset(sens.ci.all,rho < Inf & treat==1),
                fill=grey_light) +
    geom_ribbon(aes(ymin=min,ymax=max),
                data=subset(sens.all,rho < Inf & treat==1),
                fill=grey_dark) +
    geom_hline(yintercept=0, linetype="dashed", col = black) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi, colour="naive"),
                  data=filter(naive.ci.all,treat==1),
                  width=.015, size=.5) +
    geom_point(aes(y=naive, colour="naive"),
               data=filter(naive.all,treat==1)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi,x=0.21, colour="bounds"),
                  data=filter(bounds.ci.all,treat==1),
                  width=.015, size=.5) +
    geom_errorbar(aes(ymin=min, ymax=max,x=0.21, colour="bounds"),
                  data=filter(bounds.all,treat==1),
                  width=.01, size=1) +
    facet_grid(outcome ~ C,scales = "free") +
    scale_x_continuous(breaks=seq(0, .2, .05),labels=seq(0, .2, .05),limits=c(-0.015,0.225)) +
    xlab(expression("Sensitivity parameter " * (rho))) +
    scale_y_continuous(breaks=seq(-.3, .3, .1), limits = c(-0.225, 0.225),
                       labels=seq.int(from=-.30, to=.30, length.out=7)) +
    ylab("Expected change in outcome") +
    scale_colour_manual(values=c(naive=blue_mit, bounds=red_mit),
                        guide="none") +
    theme(text=element_text(colour=black),
          axis.text = element_text(size = 10.5),
          strip.text = element_text(size = 10.5)))

ggsave(r1, path = plot_path, filename = "fig_r1.pdf", 
       width=8, height=4, dpi = 600)

# Figure R2: sensitivity analyses for comparison of MSNBC versus entertainment
(r2 <- ggplot(filter(sens.ci.all,treat==2), aes(x=rho)) +
    geom_ribbon(aes(ymin=min_cilo,ymax=max_cihi),
                data=subset(sens.ci.all,rho < Inf & treat==2),
                fill=grey_light) +
    geom_ribbon(aes(ymin=min,ymax=max),
                data=subset(sens.all,rho < Inf & treat==2),
                fill=grey_dark) +
    geom_hline(yintercept=0, linetype="dashed", col = black) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi, colour="naive"),
                  data=filter(naive.ci.all,treat==2),
                  width=.015, size=.5) +
    geom_point(aes(y=naive, colour="naive"),
               data=filter(naive.all,treat==2)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi,x=0.21, colour="bounds"),
                  data=filter(bounds.ci.all,treat==2),
                  width=.015, size=.5) +
    geom_errorbar(aes(ymin=min, ymax=max,x=0.21, colour="bounds"),
                  data=filter(bounds.all,treat==2),
                  width=.01, size=1) +
    facet_grid(outcome ~ C,scales = "free") +
    scale_x_continuous(breaks=seq(0, .2, .05),labels=seq(0, .2, .05),limits=c(-0.015,0.225)) +
    scale_y_continuous(breaks=seq(-.3, .3, .1), limits = c(-0.225, 0.225),
                       labels=seq.int(from=-.30, to=.30, length.out=7)) +
    xlab(expression("Sensitivity parameter " * (rho))) +
    ylab("Expected change in outcome") +
    scale_colour_manual(values=c(naive=blue_mit, bounds=red_mit),
                        guide="none") +
    theme(text=element_text(colour=black),
          axis.text = element_text(size = 10.5),
          strip.text = element_text(size = 10.5)))

ggsave(r2, path = plot_path, filename = "fig_r2.pdf", 
       width=8, height=4, dpi = 600)
